In [8]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.mlab as mlab
In [9]:
df = pd.read_csv('E:\Space_Corrected.csv')
In [10]:
df
Out[10]:
Unnamed: 0.1 Unnamed: 0 Company Name Location Datum Detail Status Rocket Rocket Status Mission
0 0 0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success
1 1 1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success
2 2 2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success
3 3 3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success
4 4 4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success
... ... ... ... ... ... ... ... ... ...
4319 4319 4319 US Navy LC-18A, Cape Canaveral AFS, Florida, USA Wed Feb 05, 1958 07:33 UTC Vanguard | Vanguard TV3BU StatusRetired NaN Failure
4320 4320 4320 AMBA LC-26A, Cape Canaveral AFS, Florida, USA Sat Feb 01, 1958 03:48 UTC Juno I | Explorer 1 StatusRetired NaN Success
4321 4321 4321 US Navy LC-18A, Cape Canaveral AFS, Florida, USA Fri Dec 06, 1957 16:44 UTC Vanguard | Vanguard TV3 StatusRetired NaN Failure
4322 4322 4322 RVSN USSR Site 1/5, Baikonur Cosmodrome, Kazakhstan Sun Nov 03, 1957 02:30 UTC Sputnik 8K71PS | Sputnik-2 StatusRetired NaN Success
4323 4323 4323 RVSN USSR Site 1/5, Baikonur Cosmodrome, Kazakhstan Fri Oct 04, 1957 19:28 UTC Sputnik 8K71PS | Sputnik-1 StatusRetired NaN Success

4324 rows × 9 columns

In [11]:
df = df.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis = 1)
df.head()
Out[11]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success
2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success
In [12]:
df.describe()
Out[12]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission
count 4324 4324 4324 4324 4324 964 4324
unique 56 137 4319 4278 2 56 4
top RVSN USSR Site 31/6, Baikonur Cosmodrome, Kazakhstan Wed Nov 05, 2008 00:15 UTC Cosmos-3MRB (65MRB) | BOR-5 Shuttle StatusRetired 450.0 Success
freq 1777 235 2 6 3534 136 3879
In [13]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4324 entries, 0 to 4323
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Company Name    4324 non-null   object
 1   Location        4324 non-null   object
 2   Datum           4324 non-null   object
 3   Detail          4324 non-null   object
 4   Status Rocket   4324 non-null   object
 5    Rocket         964 non-null    object
 6   Status Mission  4324 non-null   object
dtypes: object(7)
memory usage: 236.6+ KB
In [14]:
ds = df["Company Name"].value_counts().reset_index()[:28]
ds
Out[14]:
index Company Name
0 RVSN USSR 1777
1 Arianespace 279
2 CASC 251
3 General Dynamics 251
4 NASA 203
5 VKS RF 201
6 US Air Force 161
7 ULA 140
8 Boeing 136
9 Martin Marietta 114
10 SpaceX 100
11 MHI 84
12 Northrop 83
13 Lockheed 79
14 ISRO 76
15 Roscosmos 55
16 ILS 46
17 Sea Launch 36
18 ISAS 30
19 Kosmotras 22
20 US Navy 17
21 ISA 13
22 Rocket Lab 13
23 Eurockot 13
24 ESA 13
25 Blue Origin 12
26 IAI 11
27 ExPace 10
In [15]:
import plotly.graph_objects as go
fig = go.Figure(go.Bar(x = ds["index"],
                       y = ds["Company Name"],
                       marker = dict( color = ds["Company Name"],
                                    colorscale = "bluered")))
fig.update_layout(title = "Number of Launches by Every Company", xaxis_title = "Top 28 Country", yaxis_title = "count", hovermode = "x")
fig.show()
In [16]:
ds = df["Status Rocket"].value_counts().reset_index()
ds
Out[16]:
index Status Rocket
0 StatusRetired 3534
1 StatusActive 790
In [17]:
import plotly.express as px
fig = px.pie(ds, values = "Status Rocket", names = "index", title = "Rocket Status")
fig.show()
In [19]:
ds = df["Status Mission"].value_counts().reset_index()
ds
Out[19]:
index Status Mission
0 Success 3879
1 Failure 339
2 Partial Failure 102
3 Prelaunch Failure 4
In [20]:
fig = px.bar(ds, x = "index", y = "Status Mission", title = "Mission Status")
fig.show()
In [21]:
np.sum(pd.isna(df.loc[:," Rocket"]))
Out[21]:
3360
In [22]:
df_ = df.dropna(subset=[" Rocket"], axis = "rows")
len(df_)
Out[22]:
964
In [23]:
df_.loc[:, " Rocket"]
Out[23]:
0        50.0 
1       29.75 
3        65.0 
4       145.0 
5       64.68 
         ...  
3855     59.0 
3971    63.23 
3993    63.23 
4000    63.23 
4020    63.23 
Name:  Rocket, Length: 964, dtype: object
In [24]:
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].fillna(0.0).str.replace(",","")
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].astype(np.float64).fillna(0.0)
C:\Users\prava\AppData\Local\Temp\ipykernel_17240\3502872187.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

C:\Users\prava\AppData\Local\Temp\ipykernel_17240\3502872187.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [25]:
df_d = df_[df_.loc[:, " Rocket"]<1000]
plt.figure(figsize = (22,6))
sns.histplot(data = df_d, x = " Rocket", hue = "Status Rocket")
plt.show()
In [26]:
np.sum(pd.isna(df.loc[:,"Status Mission"]))
Out[26]:
0
In [27]:
plt.figure(figsize = (22,6))
sns.histplot(data = df_d, x = " Rocket", hue = "Status Mission")
plt.show()
In [31]:
# Total spent money for each company
df_.head()
Out[31]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.00 Success
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.00 Success
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.00 Success
5 CASC LC-9, Taiyuan Satellite Launch Center, China Sat Jul 25, 2020 03:13 UTC Long March 4B | Ziyuan-3 03, Apocalypse-10 & N... StatusActive 64.68 Success
In [28]:
df_money = df_.groupby(["Company Name"])[" Rocket"].sum().reset_index()
df_money = df_money[df_money[" Rocket"] > 0]
df_money.head()
Out[28]:
Company Name Rocket
0 Arianespace 16345.00
1 Boeing 1241.00
2 CASC 6340.26
3 EER 20.00
4 ESA 37.00
In [29]:
df_money_ = df_money.sort_values(by = [" Rocket"], ascending = False)
df_money_.head()
Out[29]:
Company Name Rocket
14 NASA 76280.00
0 Arianespace 16345.00
21 ULA 14798.00
16 RVSN USSR 10000.00
2 CASC 6340.26
In [30]:
fig = px.bar(df_money_, x = "Company Name", y = " Rocket", title = "Total Spent Money for each Company")
fig.show()
In [31]:
# Mission Numbers by years
df["date"] = pd.to_datetime(df["Datum"])
df.head()
Out[31]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission date
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success 2020-08-07 05:12:00+00:00
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success 2020-08-06 04:01:00+00:00
2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success 2020-08-04 23:57:00+00:00
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success 2020-07-30 21:25:00+00:00
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success 2020-07-30 11:50:00+00:00
In [32]:
df["year"] = df["date"].apply(lambda datetime: datetime.year)
df.head()
Out[32]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission date year
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success 2020-08-07 05:12:00+00:00 2020
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success 2020-08-06 04:01:00+00:00 2020
2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success 2020-08-04 23:57:00+00:00 2020
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success 2020-07-30 21:25:00+00:00 2020
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success 2020-07-30 11:50:00+00:00 2020
In [34]:
ds = df["year"].value_counts().reset_index()
ds
Out[34]:
index year
0 1971 119
1 2018 117
2 1977 114
3 1975 113
4 1976 113
... ... ...
59 2010 37
60 2005 37
61 1958 28
62 1959 20
63 1957 3

64 rows × 2 columns

In [35]:
fig = px.bar(ds, x = "index", y = "year", title = "Missions Number by Year")
fig.show()
In [36]:
# Countries and Mission status
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(df["Status Mission"])
encoder
Out[36]:
LabelEncoder()
In [37]:
colors = {0: "red", 
          1 : "Orange", 
          2 : "Yellow", 
          3 : "Green"}
In [38]:
colors
Out[38]:
{0: 'red', 1: 'Orange', 2: 'Yellow', 3: 'Green'}
In [39]:
countries_dict = {
    'Russia' : 'Russian Federation',
    'New Mexico' : 'USA',
    "Yellow Sea": 'China',
    "Shahrud Missile Test Site": "Iran",
    "Pacific Missile Range Facility": 'USA',
    "Barents Sea": 'Russian Federation',
    "Gran Canaria": 'USA'
}
df["country"] = df["Location"].str.split(", ").str[-1].replace(countries_dict)
In [40]:
df.head()
Out[40]:
Company Name Location Datum Detail Status Rocket Rocket Status Mission date year country
0 SpaceX LC-39A, Kennedy Space Center, Florida, USA Fri Aug 07, 2020 05:12 UTC Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0 Success 2020-08-07 05:12:00+00:00 2020 USA
1 CASC Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... Thu Aug 06, 2020 04:01 UTC Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75 Success 2020-08-06 04:01:00+00:00 2020 China
2 SpaceX Pad A, Boca Chica, Texas, USA Tue Aug 04, 2020 23:57 UTC Starship Prototype | 150 Meter Hop StatusActive NaN Success 2020-08-04 23:57:00+00:00 2020 USA
3 Roscosmos Site 200/39, Baikonur Cosmodrome, Kazakhstan Thu Jul 30, 2020 21:25 UTC Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0 Success 2020-07-30 21:25:00+00:00 2020 Kazakhstan
4 ULA SLC-41, Cape Canaveral AFS, Florida, USA Thu Jul 30, 2020 11:50 UTC Atlas V 541 | Perseverance StatusActive 145.0 Success 2020-07-30 11:50:00+00:00 2020 USA
In [41]:
from plotly.subplots import make_subplots
fig = make_subplots(rows = 4, cols = 4, subplot_titles = df["country"].unique())
for i, country in enumerate(df["country"].unique()):
    counts = df[df["country"] == country]["Status Mission"].value_counts(normalize = True)*100
    color = [colors[x] for x in encoder.transform(counts.index)]
    trace = go.Bar(x = counts.index, y = counts.values, name = country, marker = {"color" : color}, showlegend = False)
    fig.add_trace(trace, row = (i//4) + 1, col = (i%4)+1)
fig.update_layout(title = {"text":"Countries and Mission Status"}, height = 1000, width = 1100)
for i in range(1,5):
    fig.update_yaxes(title_text = "Percentage", row = i, col = 1)
fig.show()
In [ ]: